*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*	This program implements the sample selection rules (for samples with older lagged scores)
*	----------------------------------------------------------------------------

	args fg lg fy ly min_enroll min_at_risk bw

	if "${city}"=="DEN" local pscores form
	if "${city}"=="NYC" local pscores form2
	if "${city}"=="NYCms" local pscores form2
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

***	clean

*	load analysis file

	u "${cleandata}${city}_basefile`bw'", replace

*	sample selection

	//keep first grade attempts
	bys stu grade (year): gen n = _n
	drop if n>1

	//restrict to students in the grade range specified
	keep if inrange(grade, `fg', `lg')
	drop if grade == 7 | grade == 8

	//restrict to students in the year range specified
	keep if inrange(year, `=`fy'+1', `=`ly'+1')

	//need to have outcomes
	keep if !mi(math) & !mi(ela)

	//rename older baselines
	ren bl_math bl_math_1yrlag
	ren bl_ela bl_ela_1yrlag
	foreach x in math ela {
		if "${city}" == "NYCms" ren bl_ss_`x'_3 bl_`x'
		if "${city}" == "NYC" ren bl_ss_`x'_6 bl_`x'
	}

	//need to have baselines
	if "${city}"=="NYC" destring bl_ell, replace
	if "${city}"=="NYCms" destring bl_ell, replace
	foreach x in $bl_demos bl_math bl_ela{
		keep if !mi(`x')
	}

*	baseline scores

	gen bl_math2 = bl_math^2
	gen bl_math3 = bl_math^3
	gen bl_ela2 = bl_ela^2
	gen bl_ela3 = bl_ela^3

*	school size

	egen schsize = count(sch), by(sch)

*	NYC cleaning

	if "${city}"=="NYC"{
		//drop
		drop quad* pscore*qbw* grade_code sex ethnicity ///
		birth_mm_yyyy home_lang pob_code status admit* disc* official_class poverty ///
		adcode date disability program grade_attempt right_cohor ///
		test* took* *boro* *merge ///
		bl_test_grade*

		//destring school codes
		replace sch = subinstr(sch,"B","1",.)
		replace sch = subinstr(sch,"K","2",.)
		replace sch = subinstr(sch,"M","3",.)
		replace sch = subinstr(sch,"Q","4",.)
		replace sch = subinstr(sch,"R","5",.)
		replace sch = subinstr(sch,"X","6",.)
		destring sch, replace

		foreach var of varlist enr_* offer_* pscore_*{
			loc var_num = subinstr("`var'","B","1",.)
			loc var_num = subinstr("`var_num'","K","2",.)
			loc var_num = subinstr("`var_num'","M","3",.)
			loc var_num = subinstr("`var_num'","Q","4",.)
			loc var_num = subinstr("`var_num'","R","5",.)
			loc var_num = subinstr("`var_num'","X","6",.)

			rename `var' `var_num'
		}

		//district number
		g districtnum = substr(sch_long,1,2)
		keep if districtnum != "75" & districtnum != "79"

		drop if inlist(sch,2444,2503,3444,3501,4444,4504,5444,5505,6444,6502) // home schooling
		drop if sch == 3401 //hospital schools
		foreach sch in 2444 2503 3444 3501 4444 4504 5444 5505 6444 6502 3401 {
			drop enr_`sch'
		}
	}
	if "${city}"=="NYCms"{
		//drop
		drop quad* pscore*qbw* grade_code sex ethnicity birth_mm_yyyy home_lang ///
		pob_code status admit* disc* official_class poverty days* adcode date disability program ///
		right_cohor *grade_attempt  *merge bl_test_grade*

		//destring school codes
		replace sch = subinstr(sch,"B","1",.)
		replace sch = subinstr(sch,"K","2",.)
		replace sch = subinstr(sch,"M","3",.)
		replace sch = subinstr(sch,"Q","4",.)
		replace sch = subinstr(sch,"R","5",.)
		replace sch = subinstr(sch,"X","6",.)
		destring sch, replace

		foreach var of varlist enr_* offer_* pscore_*{
			loc var_num = subinstr("`var'","B","1",.)
			loc var_num = subinstr("`var_num'","K","2",.)
			loc var_num = subinstr("`var_num'","M","3",.)
			loc var_num = subinstr("`var_num'","Q","4",.)
			loc var_num = subinstr("`var_num'","R","5",.)
			loc var_num = subinstr("`var_num'","X","6",.)

			rename `var' `var_num'
		}


		//district number
		g districtnum = substr(sch_long,1,2)
		keep if districtnum != "75" & districtnum != "79"
		drop if inlist(sch,2444,3444,3501,4444,4504,5444,5505,6444,6502) // home schooling
		drop if sch == 3401 //hospital schools
		foreach sch in 2444 2503 3444 3501 4444 4504 5444 5505 6444 6502 3401 {
			drop enr_`sch'
		}
	}

* 	drop small schools

	levelsof sch if schsize < 10, local(smallschools)
	foreach sch in `smallschools' {
		drop enr_`sch'
		drop if sch == `sch'
	}

*	omitted school

	//omitted school is a non-lottery school
	if "${city}"=="NYC"{
		gen omitted = (sch == 2430)
	}
	else if "${city}"=="NYCms"{
		gen omitted = (sch == 4145)
	}
	label var omitted "Omitted school"

*	minimum enrollment

	bys sch year: gen count = _N
	bys sch: egen min_count = min(count)

	levelsof sch, local(allschools)
	gen ols_sch = 0
	foreach sch in `allschools'{
		qui su min_count if enr_`sch'
		if r(mean) >= `min_enroll' replace ols_sch = 1 if sch == `sch'
	}

**	risk sample

	foreach ptype in `pscores'{
		sharpsample, lotteries(`allschools') ptype(`ptype') goodcell minrisk(`min_at_risk')
	}

	cap drop offersum

	//dummy out pscores
	foreach ptype in `pscores'{
		foreach pscore of varlist pscore_`ptype'_* {
			local ind_score = subinstr("`pscore'","`ptype'","i`ptype'",1)
			qui egen `ind_score' = group2(`pscore')
			qui gen byte `pscore'_0 = (`pscore' == 0)
		}
	}

**	drop extra vars

	//keep enrollment and exposure variables for included schools only
	levelsof sch if ols_sch, local(olsschools)
	foreach var of varlist enr_*{
		local sch=substr("`var'",-4,.)
		if strpos("`olsschools'","`sch'") == 0{
			foreach var of varlist *`sch'*{
				cap drop `var'
			}
		}
	}

*	fill in missings with zeros

	qui mvencode pscore_* offer* enr_*, mv(0) override

***	save

	drop n count min_count
	qui compress

	save "${builddata}${city}_analysisfile_`fg'_`lg'_`fy'_`ly'_e`min_enroll'_q`min_at_risk'`bw'_older_bl", replace
